<?php

/**
 * 针对Mht格式的文件进行解析 
 *  使用例子: 
 *   
 *  
  function mhtmlParseBody($filename) {

  if (file_exists($filename)) {
  if (is_dir($filename))
  return false;

  $filename = strtolower($filename);
  if (strpos($filename, '.mht', 1) == FALSE)
  return false;


  $o_mhtml = new mhtml ();
  $o_mhtml->set_file($filename);
  $o_mhtml->extract();
  return $o_mhtml->get_part_to_file(0);
  }
  return null;
  }

  function mhtmlParseAll($filename) {

  if (file_exists($filename)) {
  if (is_dir($filename))
  return false;

  $filename = strtolower($filename);
  if (strpos($filename, '.mht', 1) == FALSE)
  return false;


  $o_mhtml = new mhtml ();
  $o_mhtml->set_file($filename);
  $o_mhtml->extract();
  return $o_mhtml->get_all_part_file();
  }
  return null;
  }
 */

namespace html\mht;

class mhtml {

    var $file = '';
    var $boundary = '';
    var $filedata = '';
    var $countparts = 1;
    var $log = '';

    //解析数据
    function extract() {
        $ret = $this->read_filedata();
        if ($ret === FALSE) {
            return FALSE;
        }
        $this->file_parts();

        return 1;
    }

    function set_file($p) {
        $this->file = $p;
    }

    function get_log() {
        return $this->log;
    }

    function file_parts() {
        $lines = explode("\n", substr($this->filedata, 0, 8192));
        foreach ($lines as $line) {
            $line = trim($line);
            if (strpos($line, '=') !== FALSE) {
                if (strpos($line, 'boundary', 0) !== FALSE) {
                    $range = $this->getrange($line, '"', '"', 0);
                    $this->boundary = "--" . $range ['range'];
                    $this->filedata = str_replace($line, '', $this->filedata);
                    break;
                }
            }
        }
        if ($this->boundary != '') {
            $this->filedata = explode($this->boundary, $this->filedata);
            unset($this->filedata [0]);
            $this->filedata = array_values($this->filedata);
            $this->countparts = count($this->filedata);
        } else {
            $tmp = $this->filedata;
            $this->filedata = array(
                $tmp
            );
        }
    }

    function get_all_part_file() {
        return $this->filedata;
    }

    function get_part_to_file($i) {
        $line_data_start = 0;
        $encoding = '';
        $part_lines = explode("\n", ltrim($this->filedata [$i]));
        foreach ($part_lines as $line_id => $line) {
            $line = trim($line);
            if ($line == '') {
                if (trim($part_lines [0]) == '--') {
                    return 1;
                }
                $line_data_start = $line_id;
                break;
            }
            if (strpos($line, ':') !== FALSE) {
                $pos = strpos($line, ':');
                $k = strtolower(trim(substr($line, 0, $pos)));
                $v = trim(substr($line, $pos + 1, strlen($line)));
                if ($k == 'content-transfer-encoding') {
                    $encoding = $v;
                }
                if ($k == 'content-location') {
                    $location = $v;
                }
                if ($k == 'content-type') {
                    $contenttype = $v;
                }
            }
        }

        foreach ($part_lines as $line_id => $line) {
            if ($line_id <= $line_data_start)
                $part_lines [$line_id] = '';
        }

        $part_lines = implode('', $part_lines);
        if ($encoding == 'base64')
            $part_lines = base64_decode($part_lines);
        elseif ($encoding == 'quoted-printable')
            $part_lines = imap_qprint($part_lines);

        return $part_lines;
    }

    function read_filedata() {
//        var_dump($this->file);exit;
        $handle = fopen($this->file, 'r');
        //判断是否是mht文件.否在直接退出
        //读取文件头2行.判断特征字符.
        $buffer = fgets($handle, 4096);
        if (strpos($buffer, 'From') === FALSE) {
            return FALSE;
        }
        $buffer = fgets($handle, 4096);
        if (strpos($buffer, 'Subject') === FALSE) {
            return FALSE;
        }
        rewind($handle);          //又移动文件指针到文件的开头
        $this->filedata = fread($handle, filesize($this->file));
        fclose($handle);
    }

    function getrange(&$subject, $Beginmark_str = '{', $Endmark_str = '}', $Start_pos = 0) {
        /*
         * $str="sssss { x { xx } {xx{xx } x} x} sssss"; 
         * $range=string::getRange($str,'{','}',0); 
         * echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" 
         * echo $range['behin']; //tulem: 6 
         * echo $range['end']; //tulem: 30 (' ') -- l5pumÃ¤rgist jÃ¤rgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693 
         */
        if (empty($Beginmark_str)) {
            $Beginmark_str = '{';
        }
        $Beginmark_str_len = strlen($Beginmark_str);

        if (empty($Endmark_str)) {
            $Endmark_str = '}';
        }
        $Endmark_str_len = strlen($Endmark_str);

        $Start_pos_cache = 0;
        $range_end_pos = 0;
        $rangeClean = 0;
        $Begin_firstOccurence_pos = 0;
        $range_current_lenght = 0;
        $End_sequel_pos = 0;
        do {
            /* !algus */
            if (isset($Begin_firstOccurence_pos)) {
                if (!is_int($Begin_firstOccurence_pos)) {
                    $Start_pos_cache = $Start_pos;
                }
            }


            /* ?algus-test */
            $Start_pos_cache = @strpos($subject, $Beginmark_str, $Start_pos_cache);

            /* this is possible start for range */
            if (isset($Start_pos_cache)) {
                if (is_int($Start_pos_cache)) {
                    /* skip */
                    $Start_pos_cache = ($Start_pos_cache + $Beginmark_str_len);
                    /* test possible range start pos */
                    if (isset($Begin_firstOccurence_pos)) {
                        if (is_int($Begin_firstOccurence_pos)) {
                            if ($Start_pos_cache < $range_end_pos) {
                                $rangeClean = 0;
                            } elseif ($Start_pos_cache > $range_end_pos) {
                                $rangeClean = 1;
                            }
                        }
                    }

                    /* here it is */
                    if (isset($Begin_firstOccurence_pos)) {
                        if (!is_int($Begin_firstOccurence_pos)) {
                            $Begin_firstOccurence_pos = $Start_pos_cache;
                        }
                    }
                } /* VIGA NR 0 ALGUST EI OLE */
            }

            if (isset($Start_pos_cache)) {
                if (!is_int($Start_pos_cache)) {
                    /* !algus */
                    /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */
                    if (is_int($Begin_firstOccurence_pos) and ( $Start_pos_cache < $range_end_pos)) {
                        $rangeClean = 1;
                    } else {
                        return false;
                    }
                }
            }

            if (isset($Begin_firstOccurence_pos)) {
                if (is_int($Begin_firstOccurence_pos) and ( $rangeClean != 1)) {
                    if (!is_int($End_pos_cache)) {
                        $End_sequel_pos = $Begin_firstOccurence_pos;
                    }


                    $End_pos_cache = strpos($subject, $Endmark_str, $End_sequel_pos);

                    /* ok */
                    if (is_int($End_pos_cache) and ( $rangeClean != 1)) {
                        $range_current_lenght = ($End_pos_cache - $Begin_firstOccurence_pos);
                        $End_sequel_pos = ($End_pos_cache + $Endmark_str_len);
                        $range_end_pos = $End_pos_cache;
                    }
                    /* VIGA NR 2 LOPPU EI LEITUD */
                    if (!is_int($End_pos_cache)) {
                        if ($End_pos_cache == false) {
                            return false;
                        }
                    }
                }
            }
        } while ($rangeClean < 1);

        if (is_int($Begin_firstOccurence_pos) and is_int($range_current_lenght)) {
            $Range = substr($subject, $Begin_firstOccurence_pos, $range_current_lenght);
        } else {
            return false;
        }


        return array(
            'range' => $Range,
            'begin' => $Begin_firstOccurence_pos,
            'end' => $End_sequel_pos
        );
    }

// end getrange()  
// 
    //按行读取文件内容
    function getFileLines($filename, $startLine = 1, $endLine = 50, $method = 'rb') {
        $content = array();

        // 判断php版本（因为要用到SplFileObject，PHP>=5.1.0）
        if (version_compare(PHP_VERSION, '5.1.0', '>=')) {
            $count = $endLine - $startLine;
            $fp = new \SplFileObject($filename, $method);
            $fp->seek($startLine - 1); // 转到第N行, seek方法参数从0开始计数 
            for ($i = 0; $i <= $count; ++$i) {
                $content[] = $fp->current(); // current()获取当前行内容 
                $fp->next(); // 下一行 
            }
        } else {
            //PHP<5.1 
            $fp = fopen($filename, $method);
            if (!$fp)
                return 'error:can not read file';
            for ($i = 1; $i < $startLine; ++$i) { // 跳过前$startLine行 
                fgets($fp);
            }

            for ($i; $i <= $endLine; ++$i) {
                $content[] = fgets($fp); // 读取文件行内容 
            }
            fclose($fp);
        }
        return array_filter($content); // array_filter过滤：false,null,'' 
    }

}

// class  
?>  